
***Leclerc, Vink, and Schmeets (2020)
*Replication R.Transcript for: Citizenship Acquisition and Spatial Stratification: analysing immigrant residential mobility in the Netherlands
*This R script includes all the R commands necessary to execute the Figure 1 and 2 of the analysis 
*CSV files can be found in the same harvard dataverse folder

                                        ################plot KM curves###################

#install packages
install.packages("tidyverse")
library("tidyverse")
install.packages("ggplot2")
library(ggplot2)
install.packages("ggthemes")
library("ggthemes")
install.packages("reshape2")
library("reshape2")
install.packages("directlabels")
library("directlabels")
install.packages("openxlsx")
library("openxlsx")
install.packages("gridExtra")
library("gridExtra")
install.packages("dotwhisker")
library(dotwhisker)

######### Figure 1 ##########

#important data total
data_total <- read.csv("Excel KM main model.csv", sep=",", dec = ".")
data_total

data_naturalised<- read.csv("EXCEL KM Naturalised.csv", sep=",", dec = ".")
data_naturalised

data_not_naturalised<- read.csv("EXCEL KM Not Naturalised.csv", sep=",", dec = ".")
data_not_naturalised

#convert to long format
data_total_long <- melt(data_total, id="�..Year")
data_total_long

attach(data_total_long)

data_total_long$variable2 <- NA
data_total_long$variable2[data_total_long$variable== "Not.naturalised"] <- "Not naturalised"
data_total_long$variable2[data_total_long$variable== "Naturalised"] <- "Naturalised"



data_naturalised_long <- melt(data_naturalised, id="Year")
data_naturalised_long
data_naturalised_long$Naturalisation <- "Naturalised"


data_not_naturalised_long <- melt(data_not_naturalised, id="Year")
data_not_naturalised_long
data_not_naturalised_long$Naturalisation <- "Not Naturalised"


#plot total:


p3 <- ggplot(data=data_total_long,
             aes(x=�..Year, y=value, colour=variable)) +
  geom_line()+
  theme_bw()+
  xlab("Years since migration") + ylab("Probability of survival")+
  theme(axis.title = element_text(size = 12.5),
        legend.position="none")+
  scale_colour_manual(values = c("red", "blue"))+
  scale_x_continuous(limits=c(0, 16)) +
  geom_dl(aes(label = variable2), method = list(dl.trans(x = x + 0.2), "last.points", cex = 1))


p3

#merge datasets naturalised and not naturalised
data_mobility_long_all <- rbind(data_not_naturalised_long,data_naturalised_long)
data_mobility_long_all


data_mobility_long_all$variable2 <- 0
data_mobility_long_all$variable2 <- data_mobility_long_all$variable

data_mobility_long_all$variable[data_mobility_long_all$variable2=="Home.ownership"] <- 'Home ownership'




#plot

lp <- ggplot(data_mobility_long_all, aes(x = Year, y = value, colour = Naturalisation)) + 
  geom_line()+
  theme_bw()+
  xlab("Years since migration") + ylab("Kaplan-Meier failure curve")+
  scale_y_continuous(limits=c(0, 0.6)) +
  scale_x_continuous(limits=c(0, 16.5)) +
  theme(plot.title = element_text(size = 12),
        axis.title = element_text(size = 12.5),
        legend.position="none")+
  scale_colour_manual(values = c("blue", "red")) +
  geom_dl(aes(label = Naturalisation), method = list(dl.trans(x = x + 0.2), "last.points", cex = 1))

lp

lp2 <- lp + facet_grid(rows = vars(variable), space = "free_x")
lp2

#combine p1 en lp2

p4 <- combi_plot <- grid.arrange(arrangeGrob(p3, lp2, nrow = 1, widths=c(1.3, 1.1))) 
p4



###### Figure 2 #######


sub_groups <- read.csv("Sub_groups.csv", sep=",", dec = ".")

sub_groups$term <- sub_groups$�..term
sub_groups <- dwplot(sub_groups, dot_args = list(size = 6), whisker_args = list(size = 1.8)) +
  geom_vline(xintercept = 1, colour = "grey60", linetype = 5) +
  xlab("Hazard ratio") + ylab("") + xlim(c(0.90, 3.9)) +
  theme(plot.title = element_text(face="plain", size = 16, hjust = 0.5),
        axis.text=element_text(size=16),
        axis.title = element_text(size = 16),
        legend.position = "right",
        legend.justification = c(0, 0),
        legend.key.size = unit(22, "pt"),
        legend.key.width = unit(6,"pt"),
        legend.key = element_rect(fill = "white", color = NA),
        legend.text = element_text(size=16),
        legend.title = element_blank())+
  scale_colour_manual(values = c("red", "green", "blue", "orange"))

sub_groups


mobility <- read.csv("Types_of_mobility.csv", sep=",", dec = ".")

mobility$term <- mobility$�..term

mobility <- dwplot(mobility, dot_args = list(size = 6), whisker_args = list(size = 1.8)) +
  geom_vline(xintercept = 1, colour = "grey60", linetype = 5) +
  xlab("Hazard ratio") + ylab("") + xlim(c(0.90, 1.8)) +
  theme(plot.title = element_text(face="plain", size = 16, hjust = 0.5),
        axis.text=element_text(size=16),
        axis.title = element_text(size = 16),
        legend.position = "right",
        legend.justification = c(0, 0),
        legend.key.size = unit(22, "pt"),
        legend.key.width = unit(6,"pt"),
        legend.key = element_rect(fill = "white", color = NA),
        legend.text = element_text(size=16),
        legend.title = element_blank())+
  scale_colour_manual(values = c("red", "green", "blue", "orange"))

mobility


plot_grid <- grid.arrange(sub_groups, mobility)




                                                                                  
                                                                                